full_data <- readRDS('data/full_data.rds')
H2S_dm <- full_data %>%
group_by(day, Monitor) %>%
summarise(H2S_daily_max = max(H2S, na.rm = TRUE)) %>%
mutate(H2S_daily_max = if_else(H2S_daily_max == -Inf, NA, H2S_daily_max))
## Warning: There were 501 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `H2S_daily_max = max(H2S, na.rm = TRUE)`.
## ℹ In group 1215: `day = 2020-07-12`, `Monitor = "First Methodist"`.
## Caused by warning in `max()`:
## ! no non-missing arguments to max; returning -Inf
## ℹ Run ]8;;ide:run:dplyr::last_dplyr_warnings()dplyr::last_dplyr_warnings()]8;; to see the 500 remaining warnings.
## `summarise()` has grouped output by 'day'. You can override using the `.groups`
## argument.
H2S_ma <- H2S_dm %>%
mutate(yearmonth = format(day, "%Y-%m")) %>%
group_by(yearmonth, Monitor) %>%
summarise(H2S_monthly_average = mean(H2S_daily_max, na.rm = TRUE))
## `summarise()` has grouped output by 'yearmonth'. You can override using the
## `.groups` argument.
H2S_dm_graph <- H2S_dm %>%
ggplot(aes(x=day, y=H2S_daily_max, group=Monitor, color=Monitor)) +
geom_line() +
labs(title = "Daily Max H2S concentration by monitor", y = 'Daily Max H2S', x = 'time') +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45))
ggplotly(H2S_dm_graph) %>% layout(dragmode = 'pan')
H2S_dm_graph_b <- H2S_dm %>%
ggplot(aes(x=day, y=H2S_daily_max, group=Monitor, color=Monitor)) +
geom_line() +
labs(title = "Daily Max H2S concentration by monitor w/o outlier", y = 'Daily Max H2S', x = 'time') +
scale_y_continuous(limits = c(0, 100)) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45))
ggplotly(H2S_dm_graph_b) %>% layout(dragmode = 'pan')
H2S_ma_graph <- H2S_ma %>%
ggplot(aes(x=yearmonth, y=H2S_monthly_average, group=Monitor, color=Monitor)) +
geom_line() +
labs(title = "Monthly Average H2S concentration by monitor", y = 'Monthly Average H2S', x = 'time') +
scale_x_discrete(breaks = unique(H2S_ma$yearmonth)[seq(1, length(unique(H2S_ma$yearmonth)), by = 6)]) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45))
ggplotly(H2S_ma_graph) %>% layout(dragmode = 'pan')
H2S_ma_graph_b <- H2S_ma %>%
ggplot(aes(x=yearmonth, y=H2S_monthly_average, group=Monitor, color=Monitor)) +
geom_line() +
labs(title = "Monthly Average H2S concentration by monitor w/o outlier", y = 'Monthly Average H2S', x = 'time') +
scale_x_discrete(breaks = unique(H2S_ma$yearmonth)[seq(1, length(unique(H2S_ma$yearmonth)), by = 6)]) +
scale_y_continuous(limits=c(0, 50)) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45))
ggplotly(H2S_ma_graph_b) %>% layout(dragmode = 'pan')
The line with the largest deviation/peak corresponds to the 213th&Chico monitor, in 2021-10
full_data %>%
polarPlot(pollutant = "H2S", col = "turbo",
key.position = "bottom",
key.header = "mean H2S",
key.footer = NULL, title = 'hi')
h2s_model_a <- gam(H2S~s(as.numeric(month),bs='cc')+year+wd_sec+ws+I(1/MinDist^2)+Refinery, data = full_data)
summary(h2s_model_a)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## H2S ~ s(as.numeric(month), bs = "cc") + year + wd_sec + ws +
## I(1/MinDist^2) + Refinery
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.091e-01 1.084e+00 0.839 0.4016
## year2021 3.076e-01 1.082e+00 0.284 0.7763
## year2022 -7.302e-01 1.083e+00 -0.675 0.5000
## wd_secQ2 8.995e-02 4.755e-02 1.892 0.0585 .
## wd_secQ3 8.260e-01 5.171e-02 15.974 < 2e-16 ***
## wd_secQ4 4.469e-01 4.417e-02 10.118 < 2e-16 ***
## ws -9.682e-02 5.944e-03 -16.290 < 2e-16 ***
## I(1/MinDist^2) 2.222e+04 3.730e+04 0.596 0.5514
## RefineryMarathon (Carson) 2.989e+00 5.907e-02 50.603 < 2e-16 ***
## RefineryMarathon (Wilmington) 8.388e-01 6.456e-02 12.993 < 2e-16 ***
## RefineryPhillips 66 (Wilimington) -5.341e-01 5.964e-02 -8.955 < 2e-16 ***
## RefineryPhillips 66 (Wilmington) 3.880e-01 6.332e-02 6.127 8.94e-10 ***
## RefineryTorrance Refinery 8.476e-02 6.389e-02 1.327 0.1846
## RefineryValero Refinery 5.988e-01 6.286e-02 9.526 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(as.numeric(month)) 7.996 8 881.4 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.00848 Deviance explained = 0.849%
## GCV = 336.53 Scale est. = 336.52 n = 1730935
plot(h2s_model_a)
h2s_model_b <- gam(H2S~s(as.numeric(month),bs='cc')+year+wd_sec+ws+I(1/MinDist^2)+
Refinery+s(latitude, bs = 'tp')+s(longitude, bs='tp'),
data = full_data)
summary(h2s_model_b)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## H2S ~ s(as.numeric(month), bs = "cc") + year + wd_sec + ws +
## I(1/MinDist^2) + Refinery + s(latitude, bs = "tp") + s(longitude,
## bs = "tp")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.421e+02 1.395e+01 -10.184 < 2e-16 ***
## year2021 6.814e-01 1.066e+00 0.639 0.5227
## year2022 2.109e-01 1.066e+00 0.198 0.8432
## wd_secQ2 2.133e-01 4.684e-02 4.553 5.28e-06 ***
## wd_secQ3 5.200e-01 5.096e-02 10.204 < 2e-16 ***
## wd_secQ4 5.159e-01 4.353e-02 11.852 < 2e-16 ***
## ws -1.081e-01 5.855e-03 -18.465 < 2e-16 ***
## I(1/MinDist^2) 6.171e+06 1.672e+05 36.917 < 2e-16 ***
## RefineryMarathon (Carson) 1.090e+02 1.552e+01 7.021 2.21e-12 ***
## RefineryMarathon (Wilmington) 1.434e+02 1.604e+01 8.938 < 2e-16 ***
## RefineryPhillips 66 (Wilimington) 1.913e+02 1.550e+01 12.339 < 2e-16 ***
## RefineryPhillips 66 (Wilmington) 2.008e+02 1.586e+01 12.662 < 2e-16 ***
## RefineryTorrance Refinery 2.543e+01 1.177e+01 2.161 0.0307 *
## RefineryValero Refinery 1.878e+02 1.642e+01 11.434 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(as.numeric(month)) 7.999 8 1019.368 <2e-16 ***
## s(latitude) 1.000 1 2698.973 <2e-16 ***
## s(longitude) 1.000 1 6.084 0.0156 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.0381 Deviance explained = 3.82%
## GCV = 326.46 Scale est. = 326.45 n = 1730935
plot(h2s_model_b)
h2s_dm_model_a <- gam(H2S_daily_max~s(as.numeric(month),bs='cc')+year+wd_sec+ws+I(1/MinDist^2)+Refinery, data = full_data)
summary(h2s_dm_model_a)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## H2S_daily_max ~ s(as.numeric(month), bs = "cc") + year + wd_sec +
## ws + I(1/MinDist^2) + Refinery
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.024e+00 5.659e+00 0.888 0.37466
## year2021 1.330e+00 5.651e+00 0.235 0.81393
## year2022 -8.367e+00 5.652e+00 -1.480 0.13878
## wd_secQ2 -1.341e+00 2.445e-01 -5.484 4.16e-08 ***
## wd_secQ3 3.643e+00 2.663e-01 13.679 < 2e-16 ***
## wd_secQ4 -2.626e-01 2.275e-01 -1.154 0.24832
## ws 4.156e-02 3.054e-02 1.361 0.17364
## I(1/MinDist^2) 9.242e+04 1.926e+05 0.480 0.63123
## RefineryMarathon (Carson) 2.569e+01 3.040e-01 84.515 < 2e-16 ***
## RefineryMarathon (Wilmington) 4.007e+00 3.327e-01 12.043 < 2e-16 ***
## RefineryPhillips 66 (Wilimington) -8.985e-01 3.075e-01 -2.922 0.00348 **
## RefineryPhillips 66 (Wilmington) 3.154e+00 3.267e-01 9.656 < 2e-16 ***
## RefineryTorrance Refinery 8.734e-01 3.292e-01 2.653 0.00799 **
## RefineryValero Refinery 5.897e+00 3.242e-01 18.185 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(as.numeric(month)) 7.998 8 2750 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.0229 Deviance explained = 2.3%
## GCV = 9173.7 Scale est. = 9173.6 n = 1777112
plot(h2s_dm_model_a)
h2s_dm_model_b <- gam(H2S_daily_max~s(as.numeric(month),bs='cc')+year+wd_sec+ws+I(1/MinDist^2)+Refinery+s(latitude, bs = 'tp')+s(longitude, bs='tp'), data = full_data)
summary(h2s_dm_model_b)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## H2S_daily_max ~ s(as.numeric(month), bs = "cc") + year + wd_sec +
## ws + I(1/MinDist^2) + Refinery + s(latitude, bs = "tp") +
## s(longitude, bs = "tp")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.278e+03 7.783e+00 -164.226 < 2e-16 ***
## year2021 4.754e+00 5.471e+00 0.869 0.384956
## year2022 -7.489e-02 5.472e+00 -0.014 0.989080
## wd_secQ2 -2.293e-01 2.404e-01 -0.954 0.340185
## wd_secQ3 9.515e-01 2.615e-01 3.638 0.000275 ***
## wd_secQ4 2.505e-01 2.234e-01 1.121 0.262101
## ws -4.294e-02 3.005e-02 -1.429 0.152934
## I(1/MinDist^2) 5.494e+07 4.597e+05 119.509 < 2e-16 ***
## RefineryMarathon (Carson) 9.825e+02 5.680e+00 172.977 < 2e-16 ***
## RefineryMarathon (Wilmington) 1.287e+03 5.952e+00 216.151 < 2e-16 ***
## RefineryPhillips 66 (Wilimington) 1.718e+03 6.956e+00 246.937 < 2e-16 ***
## RefineryPhillips 66 (Wilmington) 1.799e+03 7.270e+00 247.406 < 2e-16 ***
## RefineryTorrance Refinery 2.327e+02 2.939e+00 79.186 < 2e-16 ***
## RefineryValero Refinery 1.687e+03 7.051e+00 239.284 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(as.numeric(month)) 7.999 8 3609 <2e-16 ***
## s(latitude) 1.000 1 69775 <2e-16 ***
## s(longitude) 1.000 1 25305 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.108 Deviance explained = 10.8%
## GCV = 8598 Scale est. = 8597.9 n = 1730935
plot(h2s_dm_model_b)
h2s_ma_model_a <- gam(H2S_monthly_average~s(as.numeric(month),bs='cc')+year+wd_sec+ws+I(1/MinDist^2)+Refinery, data = full_data)
summary(h2s_ma_model_a)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## H2S_monthly_average ~ s(as.numeric(month), bs = "cc") + year +
## wd_sec + ws + I(1/MinDist^2) + Refinery
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.562e+00 4.624e+00 0.987 0.323830
## year2021 1.076e+00 4.618e+00 0.233 0.815796
## year2022 -8.169e+00 4.618e+00 -1.769 0.076930 .
## wd_secQ2 -7.359e-01 1.981e-01 -3.714 0.000204 ***
## wd_secQ3 3.097e+00 2.157e-01 14.360 < 2e-16 ***
## wd_secQ4 -7.004e-01 1.846e-01 -3.794 0.000148 ***
## ws 1.185e-01 2.461e-02 4.815 1.47e-06 ***
## I(1/MinDist^2) 2.964e+04 1.569e+05 0.189 0.850144
## RefineryMarathon (Carson) 2.395e+01 2.453e-01 97.616 < 2e-16 ***
## RefineryMarathon (Wilmington) 4.224e+00 2.706e-01 15.613 < 2e-16 ***
## RefineryPhillips 66 (Wilimington) -2.615e-02 2.499e-01 -0.105 0.916656
## RefineryPhillips 66 (Wilmington) 3.524e+00 2.658e-01 13.262 < 2e-16 ***
## RefineryTorrance Refinery 1.570e+00 2.636e-01 5.956 2.58e-09 ***
## RefineryValero Refinery 6.305e+00 2.632e-01 23.960 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(as.numeric(month)) 7.999 8 3996 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.0307 Deviance explained = 3.07%
## GCV = 6125.1 Scale est. = 6125 n = 1814982
plot(h2s_ma_model_a)
h2s_ma_model_b <- gam(H2S_monthly_average~s(as.numeric(month),bs='cc')+year+wd_sec+ws+I(1/MinDist^2)+Refinery+s(latitude, bs = 'tp')+s(longitude, bs='tp'), data = full_data)
summary(h2s_ma_model_b)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## H2S_monthly_average ~ s(as.numeric(month), bs = "cc") + year +
## wd_sec + ws + I(1/MinDist^2) + Refinery + s(latitude, bs = "tp") +
## s(longitude, bs = "tp")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.223e+03 3.807e+01 -32.123 < 2e-16 ***
## year2021 4.242e+00 4.442e+00 0.955 0.339588
## year2022 -5.266e-01 4.442e+00 -0.119 0.905645
## wd_secQ2 4.182e-01 1.951e-01 2.143 0.032118 *
## wd_secQ3 7.243e-01 2.123e-01 3.411 0.000647 ***
## wd_secQ4 -1.549e-01 1.814e-01 -0.854 0.393162
## ws 4.626e-02 2.439e-02 1.896 0.057917 .
## I(1/MinDist^2) 5.254e+07 4.773e+05 110.085 < 2e-16 ***
## RefineryMarathon (Carson) 9.403e+02 4.217e+01 22.300 < 2e-16 ***
## RefineryMarathon (Wilmington) 1.231e+03 4.360e+01 28.243 < 2e-16 ***
## RefineryPhillips 66 (Wilimington) 1.644e+03 4.218e+01 38.982 < 2e-16 ***
## RefineryPhillips 66 (Wilmington) 1.722e+03 4.316e+01 39.891 < 2e-16 ***
## RefineryTorrance Refinery 2.226e+02 3.196e+01 6.967 3.23e-12 ***
## RefineryValero Refinery 1.615e+03 4.467e+01 36.149 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(as.numeric(month)) 8 8 5187.30 <2e-16 ***
## s(latitude) 1 1 24696.07 <2e-16 ***
## s(longitude) 1 1 55.74 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.144 Deviance explained = 14.4%
## GCV = 5667 Scale est. = 5666.9 n = 1730935
plot(h2s_ma_model_b)